#!/usr/bin/env python

import subprocess
import uuid
import re
import json
import sys
import ast
import requests
from operator import itemgetter
from heapq import nlargest
from itertools import repeat, ifilter


CLUSTER_UUID_NAME='cluster-uuid'
CLUSTER_OWNERSHIP_NAME='cluster-ownership'

verbose = False


class Counter(dict):
    '''Dict subclass for counting hashable objects.  Sometimes called a bag
    or multiset.  Elements are stored as dictionary keys and their counts
    are stored as dictionary values.

    >>> Counter('zyzygy')
    Counter({'y': 3, 'z': 2, 'g': 1})

    '''

    def __init__(self, iterable=None, **kwds):
        '''Create a new, empty Counter object.  And if given, count elements
        from an input iterable.  Or, initialize the count from another mapping
        of elements to their counts.

        >>> c = Counter()                           # a new, empty counter
        >>> c = Counter('gallahad')                 # a new counter from an iterable
        >>> c = Counter({'a': 4, 'b': 2})           # a new counter from a mapping
        >>> c = Counter(a=4, b=2)                   # a new counter from keyword args

        '''
        self.update(iterable, **kwds)

    def __missing__(self, key):
        return 0

    def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abracadabra').most_common(3)
        [('a', 5), ('r', 2), ('b', 2)]

        '''
        if n is None:
            return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
        return nlargest(n, self.iteritems(), key=itemgetter(1))

    def elements(self):
        '''Iterator over elements repeating each as many times as its count.

        >>> c = Counter('ABCABC')
        >>> sorted(c.elements())
        ['A', 'A', 'B', 'B', 'C', 'C']

        If an element's count has been set to zero or is a negative number,
        elements() will ignore it.

        '''
        for elem, count in self.iteritems():
            for _ in repeat(None, count):
                yield elem

    # Override dict methods where the meaning changes for Counter objects.

    @classmethod
    def fromkeys(cls, iterable, v=None):
        raise NotImplementedError(
            'Counter.fromkeys() is undefined.  Use Counter(iterable) instead.')

    def update(self, iterable=None, **kwds):
        '''Like dict.update() but add counts instead of replacing them.

        Source can be an iterable, a dictionary, or another Counter instance.

        >>> c = Counter('which')
        >>> c.update('witch')           # add elements from another iterable
        >>> d = Counter('watch')
        >>> c.update(d)                 # add elements from another counter
        >>> c['h']                      # four 'h' in which, witch, and watch
        4

        '''
        if iterable is not None:
            if hasattr(iterable, 'iteritems'):
                if self:
                    self_get = self.get
                    for elem, count in iterable.iteritems():
                        self[elem] = self_get(elem, 0) + count
                else:
                    dict.update(self, iterable) # fast path when counter is empty
            else:
                self_get = self.get
                for elem in iterable:
                    self[elem] = self_get(elem, 0) + 1
        if kwds:
            self.update(kwds)

    def copy(self):
        'Like dict.copy() but returns a Counter instance instead of a dict.'
        return Counter(self)

    def __delitem__(self, elem):
        'Like dict.__delitem__() but does not raise KeyError for missing values.'
        if elem in self:
            dict.__delitem__(self, elem)

    def __repr__(self):
        if not self:
            return '%s()' % self.__class__.__name__
        items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
        return '%s({%s})' % (self.__class__.__name__, items)

    # Multiset-style mathematical operations discussed in:
    #       Knuth TAOCP Volume II section 4.6.3 exercise 19
    #       and at http://en.wikipedia.org/wiki/Multiset
    #
    # Outputs guaranteed to only include positive counts.
    #
    # To strip negative and zero counts, add-in an empty counter:
    #       c += Counter()

    def __add__(self, other):
        '''Add counts from two counters.

        >>> Counter('abbb') + Counter('bcc')
        Counter({'b': 4, 'c': 2, 'a': 1})


        '''
        if not isinstance(other, Counter):
            return NotImplemented
        result = Counter()
        for elem in set(self) | set(other):
            newcount = self[elem] + other[elem]
            if newcount > 0:
                result[elem] = newcount
        return result

    def __sub__(self, other):
        ''' Subtract count, but keep only results with positive counts.

        >>> Counter('abbbc') - Counter('bccd')
        Counter({'b': 2, 'a': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        result = Counter()
        for elem in set(self) | set(other):
            newcount = self[elem] - other[elem]
            if newcount > 0:
                result[elem] = newcount
        return result

    def __or__(self, other):
        '''Union is the maximum of value in either of the input counters.

        >>> Counter('abbb') | Counter('bcc')
        Counter({'b': 3, 'c': 2, 'a': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _max = max
        result = Counter()
        for elem in set(self) | set(other):
            newcount = _max(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result

    def __and__(self, other):
        ''' Intersection is the minimum of corresponding counts.

        >>> Counter('abbb') & Counter('bcc')
        Counter({'b': 1})

        '''
        if not isinstance(other, Counter):
            return NotImplemented
        _min = min
        result = Counter()
        if len(self) < len(other):
            self, other = other, self
        for elem in ifilter(self.__contains__, other):
            newcount = _min(self[elem], other[elem])
            if newcount > 0:
                result[elem] = newcount
        return result


def run_command(cmd):
  if verbose:
    print "run_command: " + str(cmd)
  child = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
  (o, e) = child.communicate()
  return (child.returncode, o, e)


def get_uuid():
  (rc,uid,e) = run_command(['ceph', 'config-key', 'get', CLUSTER_UUID_NAME])
  if rc is not 0:
    #uuid is not yet set.
    uid = str(uuid.uuid4())
    (rc, o, e) = run_command(['ceph', 'config-key', 'put',
                             CLUSTER_UUID_NAME, uid])
    if rc is not 0:
      raise RuntimeError("\'ceph config-key put\' failed -" + e)

  return uid

def bytes_pretty_to_raw(byte_count, byte_scale):
  if byte_scale == 'kB':
    return byte_count >> 10
  if byte_scale == 'MB':
    return byte_count >> 20
  if byte_scale == 'GB':
    return byte_count >> 30
  if byte_scale == 'TB':
    return byte_count >> 40
  if byte_scale == 'PB':
    return byte_count >> 50
  if byte_scale == 'EB':
    return byte_count >> 60

  return byte_count

def get_nums():
  (rc, o, e) = run_command(['ceph', '-s', '-f', 'json'])
  if rc is not 0:
    raise RuntimeError("\'ceph -s\' failed - " + e)

  oj = json.loads(o)
  num_mons = len(oj['monmap']['mons'])
  num_osds = int(oj['osdmap']['osdmap']['num_in_osds'])
  num_mdss = oj['mdsmap']['in']

  pgmap = oj['pgmap']
  num_pgs = pgmap['num_pgs']
  num_data_bytes = pgmap['data_bytes']
  num_bytes_total = pgmap['bytes_total']

  (rc, o, e) = run_command(['ceph', 'pg', 'dump', 'pools', '-f', 'json-pretty'])
  if rc is not 0:
    raise RuntimeError("\'ceph pg dump pools\' failed - " + e)

  pools = json.loads(o)
  num_pools = len(pools)
  num_objs = 0
  for p in pools:
    num_objs += p['stat_sum']['num_objects']

  nums = {'num_mons':num_mons,
          'num_osds':num_osds,
          'num_mdss':num_mdss,
          'num_pgs':num_pgs,
          'num_data_bytes':num_data_bytes,
          'num_bytes_total':num_bytes_total,
          'num_pools':num_pools,
          'num_objects':num_objs}
  return nums

def get_crush_types():
  (rc, o, e) = run_command(['ceph', 'osd', 'crush', 'dump'])
  if rc is not 0:
    raise RuntimeError("\'ceph osd crush dump\' failed - " + e)

  crush_dump = json.loads(o)
  if crush_dump['types'] is None:
    raise RuntimeError("\'types\' item missing in \'ceph osd crush dump\'")

  crush_types = {}
  for t in crush_dump['types']:
    crush_types[t['type_id']] = t['name']

  types_list = []
  for bucket in crush_dump['buckets']:
    types_list.append(bucket['type_id'])

  crush_map = []
  types_counter = Counter(types_list)
  append = lambda t,c: crush_map.append({'type':t, 'count':c})
  for id,count in types_counter.items():
      append(crush_types[id],
             count)

  if 'devices' in crush_dump:
    append('devices', len(crush_dump['devices']))

  return crush_map

def get_osd_dump_info():
  (rc, o, e) = run_command(['ceph', 'osd', 'dump', '-f', 'json'])
  if rc is not 0:
    raise RuntimeError("\'ceph osd dump\' failed - " + e)

  pool_meta = []
  oj = json.loads(o)
  proc = lambda x: {'id':x['pool'], 'type':x['type'], 'size':x['size']}
  for p in oj['pools']:
    pool_meta.append(proc(p))

  return oj['created'], pool_meta

def get_sysinfo(max_osds):
  count = 0
  osd_metadata_available = False

  os = {}
  kern_version = {}
  kern_description = {}
  distro = {}
  cpu = {}
  arch = {}
  ceph_version = {}

  incr = lambda a,k: 1 if k not in a else a[k]+1
  while count < max_osds:
    (rc, o, e) = run_command(['ceph', 'osd', 'metadata', str(count)])
    if rc is 0:
      if osd_metadata_available is False:
        osd_metadata_available = True

      jmeta = json.loads(o)

      version = jmeta['ceph_version'].split()
      cv = version[2]
      if (len(version) > 3):
        cv += version[3]

      ceph_version[cv] = incr(ceph_version, cv)
      os[jmeta['os']] = incr(os, jmeta['os'])
      kern_version[jmeta['kernel_version']] = \
            incr(kern_version, jmeta['kernel_version'])
      kern_description[jmeta['kernel_description']] = \
            incr(kern_description, jmeta['kernel_description'])

      try:
        dstr = jmeta['distro'] + ' '
        dstr += jmeta['distro_version'] + ' '
        dstr += jmeta['distro_codename'] + ' ('
        dstr += jmeta['distro_description'] + ')'
        distro[dstr] = incr(distro, dstr)
      except KeyError:
        pass

      cpu[jmeta['cpu']] = incr(cpu, jmeta['cpu'])
      arch[jmeta['arch']] = incr(arch, jmeta['arch'])

    count = count + 1

  sysinfo = {}
  if osd_metadata_available is False:
    print >> sys.stderr, "'ceph osd metadata' is not available at all"
    return sysinfo

  def jsonify(type_count, name, type_name):
    tmp = []
    for k, v in type_count.items():
      tmp.append({type_name:k, 'count':v})
    sysinfo[name] = tmp

  jsonify(os, 'os_info', 'os')
  jsonify(kern_version, 'kernel_versions', 'version')
  jsonify(kern_description, 'kernel_types', 'type')
  jsonify(distro, 'distros', 'distro')
  jsonify(cpu, 'cpus', 'cpu')
  jsonify(arch, 'cpu_archs', 'arch')
  jsonify(ceph_version, 'ceph_versions', 'version')
  return sysinfo

def get_ownership_info():
  (rc, o, e) = run_command(['ceph', 'config-key', 'get',
                            CLUSTER_OWNERSHIP_NAME])
  if rc is not 0:
    return {}

  return ast.literal_eval(o)

def output_json():
  out = {}
  url = None

  out['uuid'] = get_uuid()
  nums = get_nums()
  num_osds = int(nums['num_osds'])
  out['components_count'] = nums
  out['crush_types'] = get_crush_types()
  out['cluster_creation_date'], out['pool_metadata'] = get_osd_dump_info()
  out['sysinfo'] = get_sysinfo(num_osds)

  owner = get_ownership_info()
  if owner is not None:
    out['ownership'] = owner
    if 'url' in owner:
      url = owner.pop('url')

  return json.dumps(out, indent=2, separators=(',', ': ')), url

def describe_usage():
  print >> sys.stderr, "Usage:"
  print >> sys.stderr, "======\n"

  print >> sys.stderr, sys.argv[0] + " [-v|--verbose] [<commands> [command-options]]\n"
  print >> sys.stderr, "without any option, shows the data to be published and do nothing"
  print >> sys.stderr, ""
  print >> sys.stderr, "-v|--verbose: toggle verbose output on stdout"
  print >> sys.stderr, ""
  print >> sys.stderr, "commands:"
  print >> sys.stderr, "publish - publish the brag report to the server"
  print >> sys.stderr, "update-metadata <update-metadata-options> - Update"
  print >> sys.stderr, "         ownership information for bragging"
  print >> sys.stderr, "clear-metadata - Clear information set by update-metadata"
  print >> sys.stderr, "unpublish --yes-i-am-shy - delete the brag report from the server"
  print >> sys.stderr, ""

  print >> sys.stderr, "update-metadata options:"
  print >> sys.stderr, "--name=  - Name of the cluster"
  print >> sys.stderr, "--organization= - Name of the organization"
  print >> sys.stderr, "--email= - Email contact address"
  print >> sys.stderr, "--description= - Reporting use-case"
  print >> sys.stderr, "--url= - The URL that is used to publish and unpublish"
  print >> sys.stderr, ""

def update_metadata():
  info = {}
  possibles = ['name', 'organization', 'email', 'description', 'url']

  #get the existing values
  info = get_ownership_info();

  for index in range(2, len(sys.argv)):
    mo = re.search("--(\S+)=(.*)", sys.argv[index])
    if not mo:
      describe_usage()
      return 22

    k = mo.group(1)
    v = mo.group(2)

    if k in possibles:
      info[k] = v
    else:
      print >> sys.stderr, "Unexpect option --" + k
      describe_usage()
      return 22

  (rc, o, e) = run_command(['ceph', 'config-key', 'put',
                            CLUSTER_OWNERSHIP_NAME, str(info)])
  return rc

def clear_metadata():
  (rc, o, e) = run_command(['ceph', 'config-key', 'del',
                            CLUSTER_OWNERSHIP_NAME])
  return rc

def publish():
  data, url = output_json()
  if url is None:
    print >> sys.stderr, "Cannot publish until a URL is set using update-metadata"
    return 1

  if verbose:
    print "PUT " + str(url) + " : " + str(data)
  req = requests.put(url, data=data)
  if req.status_code is not 201:
    print >> sys.stderr, "Failed to publish, server responded with code " + str(req.status_code)
    print >> sys.stderr, req.text
    return 1

  return 0

def unpublish():
  if len(sys.argv) <= 2 or sys.argv[2] != '--yes-i-am-shy':
    print >> sys.stderr, "unpublish should be followed by --yes-i-am-shy"
    return 22

  fail = False
  owner = get_ownership_info()
  if owner is None:
    fail = True
  try:
    url = owner['url']
  except KeyError:
    fail = True

  if fail:
    print >> sys.stderr, "URL is not updated yet"
    return 1

  uuid = get_uuid()

  params = {'uuid':uuid}
  req = requests.delete(url, params=params)
  if req.status_code is not 200:
    print >> sys.stderr, "Failed to unpublish, server responsed with code " + str(req.status_code)
    return 1

  return 0

def main():
  if len(sys.argv) > 1 and ( sys.argv[1] == '--verbose' or sys.argv[1] == '-v' ):
    global verbose
    verbose = True
    sys.argv.pop(1)
  if len(sys.argv) is 1:
    print output_json()[0]
    return 0
  if sys.argv[1] == 'update-metadata':
    return update_metadata()
  elif sys.argv[1] == 'clear-metadata':
    return clear_metadata()
  elif sys.argv[1] == 'publish':
    return publish()
  elif sys.argv[1] == 'unpublish':
    return unpublish()
  else:
    describe_usage()
    return 22

if __name__ == '__main__':
  sys.exit(main())
